perm filename DIPDAT.SAI[5,ALS] blob
sn#001155 filedate 1972-01-27 generic text, type T, neo UTF8
00010 BEGIN "DIPDAT" COMMENT 21-JAN-72;
00020 COMMENT Reads disk file "PHON" containing words and phonetic
00030 transcriptions. Creates a disk file containing counts of the number
00040 of times each diphone is used in the reviewed list.
00050 ;
00060
00070 REQUIRE "MACROS[SYS,JKS]" SOURCE_FILE;
00080 REQUIRE "COMSUB.HDR[SYS,JKS]" SOURCE_FILE;
00090
00100 INTEGER IIII,JJJJ,KKKK,LL,QQ,XXXX,YYYY;
00110 INTEGER ARRAY AAAA,BBBB[0:700];
00120 INTEGER BRK,BRK1,EOF,I,L,COUNT,CHOICE,WORDNO;
00130 STRING TSTR,DPH1,DPH2,PH1,PH2,LIN,WORD,LIN1;
00140 STRING LIST;
00150
00160 SETBR;
00170 OPEN(DSK,"DSK",1,2,0,120,BRK,EOF);
00180 OPEN(DSKO,"DSK",1,0,2,120,BRK,EOF);
00190 OPEN(TTY,"TTY",1,1,1,120,BRK,EOF);
00200
00210 LOOKIN(DSK,TSTR←"PHON");
00220 ENTEROUT(DSKO,TSTR←"DIPDAT.LST");
00230 OUT(DSKO,DATIME("ALL")&TB&"Unweighted Diphone count"&CR&LF&LF);
00240
00250 EOF ← FALSE; KKKK←COUNT ← WORDNO ← 0;
00260
00270 WHILE ¬EOF DO
00280 BEGIN "READ"
00290 LIN ← INPUT(DSK,1);
00300 TSTR ← SCAN(LIN,6,BRK);
00310 IF TSTR≠NULL THEN BEGIN WORD ← TSTR; WORDNO ← WORDNO + 1 END;
00320 LIN1←WORD&TB&LIN;
00330 IF LIN[1 FOR 1]=TB THEN LIN←LIN[2 TO ∞];
00340 TSTR←SCAN(LIN,6,BRK);
00350 TSTR←SCAN(LIN,6,BRK);
00360 TSTR←SCAN(LIN,6,BRK);
00370 LIN←LIN&CR;
00380 BRK ← 0; PH1←SCAN(LIN,10,BRK); PH2 ← " ";
00390 WHILE (LENGTH(LIN)>0) AND ¬EQU(PH2,"XX") DO
00400 BEGIN
00410 IF BRK≠CR THEN
00420 BEGIN
00430 IF BRK=TB THEN PH1←SCAN(LIN,10,BRK);
00440 PH2 ← SCAN(LIN,10,BRK);
00450 END
00460 ELSE PH2 ← "XX";
00470
00480 XXXX←CVASC(PH1&"-"&PH2);
00490 FOR I←0 STEP 1 UNTIL 699 DO
00500 BEGIN
00510 IF AAAA[I]=0 THEN DONE;
00520 IF XXXX=BBBB[I] THEN
00530 BEGIN
00540 AAAA[I]←AAAA[I]+1; KKKK←KKKK+1;
00550 DONE
00560 END;
00570 END;
00580 IF AAAA[I]=0 THEN
00590 BEGIN
00600 AAAA[I]←1; BBBB[I]←XXXX; COUNT←COUNT+1; KKKK←KKKK+1;
00610 END;
00620 PH1←PH2;
00630 END;
00640 IF INCHRS=" " THEN
00650 BEGIN OUT(TTY," RUN ABORTED AT WORD # "&CVS(WORDNO)&" WITH "&CVS(COUNT)&" DIPHONES."); DONE END; C SPACE STOPS RUN;
00660 END "READ";
00670 OUT(DSKO,"Usage counts in 10% groups for the "&CVS(COUNT)&" diphones in "
00680 &CVS(WORDNO)&" words used by 2 or more speakers in Jones and Wexman"&CRLF&LF);
00690 KKKK←KKKK%10+1;
00700
00710 COMMENT We must now sort file in decending order of A[I];
00720 FOR JJJJ←698 STEP -1 UNTIL 0 DO
00730 FOR I←JJJJ STEP 1 UNTIL 699 DO
00740 BEGIN
00750 IF AAAA[I]>AAAA[I+1] THEN DONE;
00760 IF AAAA[I]=AAAA[I+1] THEN IF BBBB[I]≤BBBB[I+1] THEN DONE;
00770 XXXX←AAAA[I]; AAAA[I]←AAAA[I+1]; AAAA[I+1]←XXXX;
00780 XXXX←BBBB[I]; BBBB[I]←BBBB[I+1]; BBBB[I+1]←XXXX;
00790 END;
00800
00810 COMMENT Now prepare listing;
00820 I←0; COUNT←2; SETFORMAT(5,0); LL←AAAA[I]; QQ←1;
00830
00840 OUT(DSKO,CVS(AAAA[I])&TB&CVSTR(BBBB[I])&TB);
00850 FOR I←1 STEP 1 UNTIL 699 DO
00860 BEGIN
00870 IF AAAA[I]=0 THEN DONE;
00880 IF AAAA[I]≠AAAA[I-1] THEN
00890 BEGIN
00900 IF COUNT MOD 2 =1 THEN
00910 BEGIN
00920 OUT(DSKO,TB); COUNT←COUNT+1;
00930 END;
00940 IF COUNT≥3 THEN BEGIN
00950 FOR L←1 STEP 1 UNTIL 14 DO
00960 IF AAAA[I]≠AAAA[I+L] THEN DONE;
00970 IF COUNT+ L≥14 THEN
00980 BEGIN
00990 OUT(DSKO,CRLF);
01000 COUNT←0; JJJJ←AAAA[I];
01010 END;
01020 END;
01030 IF COUNT≥13 THEN
01040 BEGIN
01050 OUT(DSKO,CRLF);
01060 COUNT←0; JJJJ←AAAA[I];
01070 END;
01080 OUT(DSKO,CVS(AAAA[I])&TB);
01090 COUNT←COUNT+1;
01100 END
01110 ELSE
01120 BEGIN
01130 IF COUNT≥14 THEN
01140 BEGIN
01150 OUT(DSKO,CRLF);
01160 IF AAAA[I]≠JJJJ THEN
01170 BEGIN
01180 JJJJ←AAAA[I];
01190 OUT(DSKO,CVS(AAAA[I]));
01200 END;
01210 OUT(DSKO,TB);
01220 COUNT←1;
01230 END;
01240 END;
01250 OUT(DSKO,CVSTR(BBBB[I])&TB);
01260 COUNT←COUNT+1;
01270 LL←LL+AAAA[I]; QQ←QQ+1;
01280 IF LL≥KKKK THEN BEGIN
01290 LL←LL-KKKK; OUT(DSKO,CRLF&"****"&TB&CVS(QQ)&" Diphones"); COUNT←14; END;
01300 END;
01310 CLOSE(DSK);
01320
01330 OUT(DSKO,CR&FF&"Diphones found more than once in "&cvs(wordno)
01340 &" words used by 2 or more speakers in Jones and Wexman"&CRLF&LF);
01350 I←0; WHILE AAAA[I]>1 DO I←I+1; IIII←I;
01360
01370 SETFORMAT(1,0);
01380 FOR JJJJ←IIII-2 STEP -1 UNTIL 0 DO
01390 FOR I←JJJJ STEP 1 UNTIL IIII-2 DO
01400 IF BBBB[I]>BBBB[I+1] THEN BEGIN
01410 XXXX←BBBB[I]; BBBB[I]←BBBB[I+1]; BBBB[I+1]←XXXX; END
01420 ELSE DONE;
01430
01440 COUNT←0;
01450 FOR I←0 STEP 1 UNTIL IIII-1 DO
01460 BEGIN
01470 OUT(DSKO,CVSTR(BBBB[I])&TB);
01480 COUNT←COUNT+1;
01490 IF COUNT≥14 THEN BEGIN OUT(DSKO,CRLF); COUNT←0; END;
01500 END;
01510
01520 OUT(DSKO,CR&FF&"Diphones found in only one word of "&CVS(WORDNO)
01530 &" words used by 2 or more speakers in Jones and Wexman"&CRLF&LF);
01540
01550 OPEN(DSK,"DSK",1,2,0,120,BRK,EOF);
01560 LOOKIN(DSK,TSTR←"PHON");
01570 EOF ← FALSE; COUNT ← WORDNO ← 0;
01580
01590 WHILE ¬EOF DO
01600 BEGIN "REVIEW"
01610 LIN ← INPUT(DSK,1);
01620 TSTR ← SCAN(LIN,6,BRK);
01630 IF TSTR≠NULL THEN BEGIN WORD ← TSTR; WORDNO ← WORDNO + 1 END;
01640 LIN1←WORD&TB&LIN;
01650 IF LIN[1 FOR 1]=TB THEN LIN←LIN[2 TO ∞];
01660 TSTR←SCAN(LIN,6,BRK);
01670 TSTR←SCAN(LIN,6,BRK);
01680 TSTR←SCAN(LIN,6,BRK);
01690 LIN←LIN&CR;
01700 BRK ← 0; PH1←SCAN(LIN,10,BRK); PH2 ← " ";
01710 WHILE (LENGTH(LIN)>0) AND ¬EQU(PH2,"XX") DO
01720 BEGIN
01730 IF BRK≠CR THEN
01740 BEGIN
01750 IF BRK=TB THEN PH1←SCAN(LIN,10,BRK);
01760 PH2 ← SCAN(LIN,10,BRK);
01770 END
01780 ELSE PH2 ← "XX";
01790
01800 XXXX←CVASC(PH1&"-"&PH2);
01810 FOR I←IIII STEP 1 UNTIL 699 DO
01820 BEGIN
01830 IF AAAA[I]=0 THEN DONE;
01840 IF XXXX=BBBB[I] THEN
01850 BEGIN
01860 OUT(DSKO,CVSTR(BBBB[I])&TB&WORD&CRLF);
01870 DONE
01880 END;
01890 END;
01900 PH1←PH2;
01910 END;
01920 END "REVIEW";
01930
01940 XXXX←CVASC("ZZZZZ");
01950 LIST← "A AA AE AR AW B CH D DH E EE F G H I J K L M N NG O OO P R S SH T TH U V W Y Z ZH"&CR;
01960 FOR I←699 STEP -1 UNTIL 0 DO
01970 IF AAAA[I]=0 THEN BBBB[I]←XXXX ELSE DONE;
01980 KKKK←I;
01990
02000 FOR JJJJ←KKKK-1 STEP -1 UNTIL 0 DO
02010 FOR I←JJJJ STEP 1 UNTIL KKKK-1 DO
02020 IF BBBB[I]>BBBB[I+1] THEN BEGIN
02030 XXXX←BBBB[I]; BBBB[I]←BBBB[I+1]; BBBB[I+1]←XXXX; END
02040 ELSE DONE;
02050
02060 OUT(DSKO,CR&FF&"All diphones found in "
02070 &CVS(WORDNO)&" words used by 2 or more speakers in Jones and Wexman"&CRLF&LF);
02080 COUNT←0;
02090 FOR I←0 STEP 1 UNTIL 699 DO
02100 BEGIN
02110 IF AAAA[I]=0 THEN DONE;
02120 OUT(DSKO,CVSTR(BBBB[I])&TB);
02130 COUNT←COUNT+1;
02140 IF COUNT≥14 THEN BEGIN OUT(DSKO,CRLF); COUNT←0; END;
02150 END;
02160 CLOSE(DSK);
02170
02180 OUT(DSKO,CR&FF&"Diphones not found in "
02190 &CVS(WORDNO)&" words used by 2 or more speakers in Jones and Wexman"&CRLF&LF);
02200 COUNT←0; I←0; LIN←LIST&CR; BRK←0;
02210
02220 WHILE (LENGTH(LIN)>0) DO
02230 BEGIN
02240 IF BRK=CR THEN DONE;
02250 PH1←SCAN(LIN,10,BRK);
02260 LIN1←LIST&CR;
02270 BRK1←0;
02280 WHILE (LENGTH(LIN1)>0) DO
02290 BEGIN
02300 IF BRK1=CR THEN DONE;
02310 PH2←SCAN(LIN1,10,BRK1);
02320 XXXX←CVASC(PH1&"-"&PH2);
02330 IF XXXX>BBBB[I] THEN
02340 BEGIN
02350 OUT(DSKO,CRLF&CVSTR(BBBB[I])&" encountered as unidentified"&CRLF);
02360 OUT(TTY,CRLF&CVSTR(BBBB[I])&" encountered as unidentified"&CRLF);
02370 I←I+1;
02380 END;
02390 IF XXXX<BBBB[I] THEN
02400 BEGIN
02410 OUT(DSKO,CVSTR(XXXX)&TB);
02420 COUNT←COUNT+1;
02430 IF COUNT≥14 THEN BEGIN OUT(DSKO,CRLF); COUNT←0; END;
02440 END;
02450 IF XXXX=BBBB[I] THEN I←I+1;
02460 END;
02470 END;
02480
02490
02500
02510 CLOSE(DSKO);
02520 CLOSE(DSK);
02530 OUT(TTY,CRLF&"OUTPUT FILE: DIPDAT.LST");
02540
02550 END "DIPDAT";